# This file assesses the relationship between perceptions of local communities
# and outcomes conditional on DA itself. So we have many pairs, but also a few
# DAs with many (upto to 10) respondents.

library(here)
library(lme4)
library(tidyverse)
library(estimatr)
library(fixest)

load(here("Data", "sameDAdat.rda"), verbose = TRUE)
nrow(sameDAdat)
# [1] 1894
table(table(sameDAdat$dauid))
#
#   2   3   4   5   6   9
# 674 131  20   8   4   1

sameDAdat <- sameDAdat %>%
  filter(!is.na(social.capital01) & !is.na(community.resp01)) %>%
  group_by(dauid) %>%
  mutate(
    numda = n(),
    perc_rank = rank(vm.community.subj) - 1,
    tied = length(unique(vm.community.subj)) == 1,
    scmd = social.capital01 - mean(social.capital01),
    cemd = community.resp01 - mean(community.resp01),
    vmsubjmd = vm.community.subj - mean(vm.community.subj),
    vmnorm2md = vm.community.norm2 - mean(vm.community.norm2)
  ) %>%
  ungroup() %>%
  filter(numda > 1) %>%
  select(
    vcid, dauid, vm.community.subj, vm.community.norm2, perc_rank,
    tied, scmd, cemd, vmsubjmd, vmnorm2md, social.capital01, community.resp01,
    da_prop_vm_20pct_06
  ) %>%
  droplevels()
dim(sameDAdat)
# [1] 1894   42
table(table(sameDAdat$dauid))
#
#   2   3   4   5   6   9
# 674 131  20   8   4   1

## Checking to make sure that we are correct in not needing to adjust for da_vm in this analysis.
blah <- sameDAdat %>%
  group_by(dauid) %>%
  summarize(uniqdavm = length(unique(da_prop_vm_20pct_06))) %>%
  ungroup()
stopifnot(all(blah$uniqdavm == 1))

## Basic descriptions

## Number respondents
dim(sameDAdat)
# [1] 1894   42
## Number of DAS
length(unique(sameDAdat$dauid))
# [1] 838
## Number of people per DA
table(sameDAdat$dauid)
table(table(sameDAdat$dauid))
#
#   2   3   4   5   6   9
# 674 131  20   8   4   1
20 + 8 + 4 + 1
# [1] 33


lmer1 <- lmer(social.capital01 ~ vm.community.norm2 + (1 | dauid), data = sameDAdat)
summary(lmer1)

lmer2 <- lmer(community.resp01 ~ vm.community.norm2 + (1 | dauid), data = sameDAdat)
summary(lmer2)

lmer1res <- c(Estimate = fixef(lmer1)[[2]], ci = confint(lmer1, level = .95, parm = 4))
lmer2res <- c(Estimate = fixef(lmer2)[[2]], ci = confint(lmer2, level = .95, parm = 4))

res1SameDA_mlm <- rbind(
  lmer1 = lmer1res, # social.capital01
  lmer2 = lmer2res # community.resp01
)
row.names(res1SameDA_mlm) <- c(
  "Social Cohesion",
  "Collective Efficacy"
)

## Only used before we used the truncated perceptions measure
## library(predictmeans)
## cooksDlmer1 <- CookD(lmer1, group = "dauid") ## 48020260
## cooksDlmer2 <- CookD(lmer2, group = "dauid") ## 48020260

## Out of bounds perceptions. Trying to use norm2
## sameDAdat %>% filter(dauid == 48020260)

## The fixed effects estimators show the same substantive results:
lmfe1 <- lm_robust(social.capital01 ~ vm.community.norm2, fixed_effects = ~dauid, data = sameDAdat)
lmfe2 <- lm_robust(community.resp01 ~ vm.community.norm2, fixed_effects = ~dauid, data = sameDAdat)

lmfe1tidy <- tidy(lmfe1)
lmfe2tidy <- tidy(lmfe2)

lmfe1res <- c(Estimate = lmfe1tidy[["estimate"]], ci = c(lmfe1tidy[["conf.low"]], lmfe1tidy[["conf.high"]]))
lmfe2res <- c(Estimate = lmfe2tidy[["estimate"]], ci = c(lmfe2tidy[["conf.low"]], lmfe2tidy[["conf.high"]]))

res1SameDA <- rbind(
  "Social Cohesion" = lmfe1res, # social.capital01
  "Collective Efficacy" = lmfe2res # community.resp01
)

## The fixed effects model allows for a lot more variability in the intercepts
## (most have very little information avaiable to them)

lmer1_rs <- lmer(social.capital01 ~ vm.community.norm2 + (1 + vm.community.norm2 | dauid), data = sameDAdat)

library(brms)
blmer1_social_cohesion <- brm(social.capital01 ~ vm.community.norm2 + (1 + vm.community.norm2 | dauid),
  data = sameDAdat,
  chains = 8, iter = 4000, cores = 8,
  control = list(adapt_delta = 0.99, max_treedepth = 20)
)
summary(blmer1_social_cohesion)
save(blmer1_social_cohesion, file = here("Analysis", "blmer1_sameDA_sc.rda"))

## Reminder that we have mostly DAs of size 2 but a few that are larger
table(table(sameDAdat$dauid))
## Fixed effects will weight by variance of vm.community.norm2 within DA plus size of DA.
## Multilevel models will also weight but using models.

lmfe1c <- feols(social.capital01 ~ vm.community.norm2 | dauid, data = sameDAdat)

sc_fes2 <- fixef(lmfe1c)
sc_fes <- lmfe1$fixed_effects
sc_res <- coef(lmer1)
sc_lmer_rs <- coef(lmer1_rs)
sc_brm_res <- coef(blmer1_social_cohesion)
sc_da_avgs <- sameDAdat %>%
  group_by(dauid) %>%
  summarize(sc_avg = mean(social.capital01), nda = n()) %>%
  ungroup()

sc_dat0 <- data.frame(
  sc_res = sc_res[["dauid"]][["(Intercept)"]],
  sc_fes = sc_fes,
  sc_fes_feols = sc_fes2$dauid,
  sc_lmer_rs_slopes = sc_lmer_rs[["dauid"]][["vm.community.norm2"]],
  sc_bayes_intercept = sc_brm_res[["dauid"]][, "Estimate", "Intercept"],
  sc_bayes_slope = sc_brm_res[["dauid"]][, "Estimate", "vm.community.norm2"],
  dauid = row.names(sc_res$dauid),
  dauid2 = dimnames(sc_fes)[[1]]
)

sc_dat <- left_join(sc_dat0, sc_da_avgs)
sc_dat <- sc_dat %>% mutate(big_das = factor(ifelse(nda > 3, dauid, NA)))

all.equal(sc_dat$sc_fes, sc_dat$sc_fes_feols)

g1 <- ggplot(data = sc_dat, aes(x = sc_fes, y = sc_res, color = big_das, size = nda)) +
  geom_point(alpha = .5) +
  theme_minimal()
g1

g2 <- ggplot(data = sc_dat, aes(x = sc_fes, y = sc_bayes_intercept, color = big_das, size = nda)) +
  geom_point(alpha = .5) +
  theme_minimal()
g2

## The fixed effects are nearly perfect predictors of the average sc in the da
g4 <- ggplot(data = sc_dat, aes(x = sc_fes, y = sc_avg, color = big_das, size = nda)) +
  geom_point()
g4

g5 <- ggplot(data = sc_dat, aes(x = sc_lmer_rs_slopes, y = sc_bayes_slope, color = big_das, size = nda)) +
  geom_point(alpha = .5) +
  theme_minimal()
g5

## Look at correlation between intercepts and slopes: in general, higher intercept more negative slope
g6 <- ggplot(data = sc_dat, aes(x = sc_fes, y = sc_bayes_slope, color = big_das, size = nda)) +
  geom_point(alpha = .5) +
  theme_minimal()
g6

## Look at correlation between intercepts and slopes: in general, higher intercept more negative slope
g7 <- ggplot(data = sc_dat, aes(x = sc_fes, y = sc_lmer_rs_slopes, color = big_das, size = nda)) +
  geom_point(alpha = .5) +
  theme_minimal()
g7

# cor(sc_dat %>% select(-dauid, -dauid2, -big_das))

## Testing to make sure that "fixed_Effects" above do what we want:
lmfe1b <- lm(scmd ~ vmnorm2md, data = sameDAdat)
coef(lmfe1b)[2]

lmfe1c <- feols(social.capital01 ~ vm.community.norm2 | dauid, data = sameDAdat)
coef(lmfe1c)

## So they are all estimating the same thing
coef(lmfe1)

sameDAdat %>%
  filter(perc_rank %in% c(1.5, 3.5)) %>%
  arrange(dauid, perc_rank) %>%
  select(dauid, vm.community.subj, perc_rank)

sameDAdat %>%
  filter(dauid %in% c(12080117, 13100202, 35010293, 35090114, 35130063, 59290180)) %>%
  arrange(dauid, vm.community.subj) %>%
  select(dauid, vm.community.subj, perc_rank) %>%
  print(n = 100)

## Now using the ranks --- to talk about "higher perceivers"

## The fixed effects estimators show the same substantive results:
lmfe1rank <- lm_robust(social.capital01 ~ perc_rank, fixed_effects = ~dauid, data = sameDAdat)
lmfe2rank <- lm_robust(community.resp01 ~ perc_rank, fixed_effects = ~dauid, data = sameDAdat)

lmfe1rank_tidy <- tidy(lmfe1rank)
lmfe2rank_tidy <- tidy(lmfe2rank)

lmfe1rank_res <- c(
  Estimate = lmfe1rank_tidy[["estimate"]],
  ci = c(lmfe1rank_tidy[["conf.low"]], lmfe1rank_tidy[["conf.high"]])
)
lmfe2rank_res <- c(
  Estimate = lmfe2rank_tidy[["estimate"]],
  ci = c(lmfe2rank_tidy[["conf.low"]], lmfe2rank_tidy[["conf.high"]])
)

res1rank_SameDA <- rbind(
  "Social Cohesion" = lmfe1rank_res, # social.capital01
  "Collective Efficacy" = lmfe2rank_res # community.resp01
)

save(res1SameDA, res1SameDA_mlm, file = here("Analysis", "analysis_sameDA.rda"))
